rm(list = ls())

library(lfe)
library(tidyverse)

## Load MP data

dat <- readRDS('data/mp_data.rds')  %>%
  filter(elecper != 8) %>%
  distinct(mp_id, .keep_all = T)

dat$elecper <- NULL

experience_covars <- c('nsdap_member', 
                       'syn_in_gem_bin', 
                       'veteran_ww1',
                       'veteran_ww2', 
                       'capture_ww2',
                       'soviet_capture',
                       'exile_repressed_combined', 
                       'resistance_member',
                       'rel_cath')

covars <- c('year_birth', 'gender', 'mandate', 'dualcand', 'closeness_district_categ')

dat$vote_id <- NULL

## Define votes we want to look at 

voting <- read_dta('data/voting_behavior.dta') %>% 
  filter(!vote_id %in% c('4026')) %>% ## drop Sol vote 
  mutate(vote_deviate_bin = ifelse(vote_deviate %in% c('2', '3'), 1, 0)) %>% 
  dplyr::select(one_of('mp_id', 'vote_deviate_bin', 'vote_id', 'elecper')) %>%
  filter(elecper %in% c('4', '5')) %>%
  mutate(mp_id = as.character(mp_id),
         elecper = as.character(elecper)) %>% 
  left_join(dat, by = c('mp_id'))

## Run analysis 
## Separate analysis for each vote 

vote_nr = unique(voting$vote_id)[4]

res <- lapply(unique(voting$vote_id), function(vote_nr){
  
  dat_ss <- voting %>%
    filter(vote_id == vote_nr)
  
  ## Same specification as for main results 
  
  m1 <-  paste('vote_deviate_bin', '~', paste(experience_covars, collapse = "+"),'+', paste(covars, collapse = "+"), ' |  party_fe + state_id | 0 | ags_cluster') %>% as.formula()
  
  models <- list(m1)
  
  lapply(models, function(m_formula){
    
    felm(m_formula, data = dat_ss) %>%
      broom::tidy() %>%
      filter(term == 'syn_in_gem_bin') %>%
      mutate(treat_var = 'syn_in_gem_bin',
             vote_id = vote_nr)  
    
  }) %>%
    reduce(bind_rows) 
  
}) %>%
  reduce(rbind)  

## Plot distribution of effect estimates 

main_estimate <- 2.02

p1 <- ggplot(res, aes(x = statistic))  + 
  geom_histogram(binwidth = .3, col = 'black') + 
  theme_bw() + 
  geom_vline(xintercept = median(res$statistic), linetype = 'solid', col = 'black') +
  geom_vline(xintercept = main_estimate, linetype = 'dotdash') +
  labs(y = 'Count',
       x = 'Test statistic') +
  scale_x_continuous(limits = c(min(res$statistic) - .5, max(res$statistic) + 1)) +
  geom_curve(aes(x = 2.5, y = 7.5, xend = 2.2, yend = 6), 
             colour = 'black', 
             linewidth = 0.5, 
             curvature = -0.1,
             arrow = arrow(length = unit(0.03, "npc"))) +
  annotate('text', x = 2.75, y = 8.5, 
           size = 4.2,
           label = 'Main specification \nt-statistic') + 
  scale_y_continuous(limits = c(0, 15))


p1
 
res %>% filter(abs(statistic) > 1.96) %>% nrow() / nrow(res)
 
